x86: hvm: Allow configuration of the size of the mmio_hole.
authorDon Slutz <dslutz@verizon.com>
Tue, 21 Oct 2014 12:42:26 +0000 (08:42 -0400)
committerIan Campbell <ian.campbell@citrix.com>
Thu, 23 Oct 2014 10:47:49 +0000 (11:47 +0100)
If you add enough PCI devices then all mmio may not fit below 4G
which may not be the layout the user wanted. This allows you to
increase the below 4G address space that PCI devices can use and
therefore in more cases not have any mmio that is above 4G.

There are real PCI cards that do not support mmio over 4G, so if you
want to emulate them precisely, you may also need to increase the
space below 4G for them. There are drivers for these cards that also
do not work if they have their mmio space mapped above 4G.

This allows growing the MMIO hole to the size needed.

This may help with using pci passthru and HVM.

In the tools this is named mmio_hole_memkb.

Signed-off-by: Don Slutz <dslutz@verizon.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
[ ijc -- fixed build error in xl_cmdimpl.c with s/%PRIu64/%ld/.
         Reworded title ]

docs/man/xl.cfg.pod.5
tools/firmware/hvmloader/config.h
tools/firmware/hvmloader/pci.c
tools/libxl/libxl.h
tools/libxl/libxl_create.c
tools/libxl/libxl_dm.c
tools/libxl/libxl_dom.c
tools/libxl/libxl_types.idl
tools/libxl/xl_cmdimpl.c

index 5830e09017307455a425a246f259ff37691d53ef..622ea53d9114cc36be674a07ec62a9cfc8bac706 100644 (file)
@@ -1164,6 +1164,21 @@ wallclock (i.e., real) time.
 
 =back
 
+=head3 Memory layout
+
+=over 4
+
+=item B<mmio_hole=MBYTES>
+
+Specifies the size the MMIO hole below 4GiB will be.  Only valid for
+device_model_version = "qemu-xen".
+
+Cannot be smaller than 256. Cannot be larger than 3840.
+
+Known good large value is 3072.
+
+=back
+
 =head3 Support for Paravirtualisation of HVM Guests
 
 The following options allow Paravirtualised features (such as devices)
index 80bea46d28ccbd11e252c88b7f01aaafe0197bf4..b838cf9c1730aec40e8522d98ee2b268282259e7 100644 (file)
@@ -53,7 +53,6 @@ extern struct bios_config ovmf_config;
 #define PCI_ISA_IRQ_MASK    0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
 
 /* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
-#define PCI_MEM_START       0xf0000000
 #define PCI_MEM_END         0xfc000000
 
 extern unsigned long pci_mem_start, pci_mem_end;
index 3712988e645e5b049f05f0bdf9f6b4926dd53612..4e8d803b798739418277b1a07321401943da13df 100644 (file)
 #include <xen/memory.h>
 #include <xen/hvm/ioreq.h>
 #include <xen/hvm/hvm_xs_strings.h>
+#include <xen/hvm/e820.h>
 #include <stdbool.h>
 
-unsigned long pci_mem_start = PCI_MEM_START;
+unsigned long pci_mem_start = HVM_BELOW_4G_MMIO_START;
 unsigned long pci_mem_end = PCI_MEM_END;
 uint64_t pci_hi_mem_start = 0, pci_hi_mem_end = 0;
 
@@ -59,6 +60,7 @@ void pci_setup(void)
         uint64_t bar_sz;
     } *bars = (struct bars *)scratch_start;
     unsigned int i, nr_bars = 0;
+    uint64_t mmio_hole_size = 0;
 
     const char *s;
     /*
@@ -86,6 +88,10 @@ void pci_setup(void)
     printf("Relocating guest memory for lowmem MMIO space %s\n",
            allow_memory_relocate?"enabled":"disabled");
 
+    s = xenstore_read("platform/mmio_hole_size", NULL);
+    if ( s )
+        mmio_hole_size = strtoll(s, NULL, 0);
+
     /* Program PCI-ISA bridge with appropriate link routes. */
     isa_irq = 0;
     for ( link = 0; link < 4; link++ )
@@ -237,26 +243,49 @@ void pci_setup(void)
         pci_writew(devfn, PCI_COMMAND, cmd);
     }
 
-    /*
-     * At the moment qemu-xen can't deal with relocated memory regions.
-     * It's too close to the release to make a proper fix; for now,
-     * only allow the MMIO hole to grow large enough to move guest memory
-     * if we're running qemu-traditional.  Items that don't fit will be
-     * relocated into the 64-bit address space.
-     *
-     * This loop now does the following:
-     * - If allow_memory_relocate, increase the MMIO hole until it's
-     *   big enough, or until it's 2GiB
-     * - If !allow_memory_relocate, increase the MMIO hole until it's
-     *   big enough, or until it's 2GiB, or until it overlaps guest
-     *   memory
-     */
-    while ( (mmio_total > (pci_mem_end - pci_mem_start)) 
-            && ((pci_mem_start << 1) != 0)
-            && (allow_memory_relocate
-                || (((pci_mem_start << 1) >> PAGE_SHIFT)
-                    >= hvm_info->low_mem_pgend)) )
-        pci_mem_start <<= 1;
+    if ( mmio_hole_size )
+    {
+        uint64_t max_ram_below_4g = (1ULL << 32) - mmio_hole_size;
+
+        if ( max_ram_below_4g > HVM_BELOW_4G_MMIO_START )
+        {
+            printf("max_ram_below_4g=0x"PRIllx
+                   " too big for mmio_hole_size=0x"PRIllx
+                   " has been ignored.\n",
+                   PRIllx_arg(max_ram_below_4g),
+                   PRIllx_arg(mmio_hole_size));
+        }
+        else
+        {
+            pci_mem_start = max_ram_below_4g;
+            printf("pci_mem_start=0x%lx (was 0x%x) for mmio_hole_size=%lu\n",
+                   pci_mem_start, HVM_BELOW_4G_MMIO_START,
+                   (long)mmio_hole_size);
+        }
+    }
+    else
+    {
+        /*
+         * At the moment qemu-xen can't deal with relocated memory regions.
+         * It's too close to the release to make a proper fix; for now,
+         * only allow the MMIO hole to grow large enough to move guest memory
+         * if we're running qemu-traditional.  Items that don't fit will be
+         * relocated into the 64-bit address space.
+         *
+         * This loop now does the following:
+         * - If allow_memory_relocate, increase the MMIO hole until it's
+         *   big enough, or until it's 2GiB
+         * - If !allow_memory_relocate, increase the MMIO hole until it's
+         *   big enough, or until it's 2GiB, or until it overlaps guest
+         *   memory
+         */
+        while ( (mmio_total > (pci_mem_end - pci_mem_start))
+                && ((pci_mem_start << 1) != 0)
+                && (allow_memory_relocate
+                    || (((pci_mem_start << 1) >> PAGE_SHIFT)
+                        >= hvm_info->low_mem_pgend)) )
+            pci_mem_start <<= 1;
+    }
 
     if ( mmio_total > (pci_mem_end - pci_mem_start) )
     {
index 28b614fb93058ad71d3c19e73f251d21b6028c15..c3451bdc429f61eeccf7048339745daf6703d24f 100644 (file)
 #define LIBXL_HAVE_BUILDINFO_HVM_VIRIDIAN_ENABLE_DISABLE 1
 #define LIBXL_BUILDINFO_HVM_VIRIDIAN_ENABLE_DISABLE_WIDTH 64
 
+/*
+ * libxl_domain_build_info has the u.hvm.mmio_hole_memkb field.
+ */
+#define LIBXL_HAVE_BUILDINFO_HVM_MMIO_HOLE_MEMKB 1
+
 /*
  * libxl ABI compatibility
  *
index 4b33d073721d3f63c111f1a0dcbea259021831b2..b1ff5aee7d64349401a13a949eebe08d473cf751 100644 (file)
@@ -23,6 +23,7 @@
 #include <xc_dom.h>
 #include <xenguest.h>
 #include <xen/hvm/hvm_info_table.h>
+#include <xen/hvm/e820.h>
 
 int libxl__domain_create_info_setdefault(libxl__gc *gc,
                                          libxl_domain_create_info *c_info)
@@ -226,6 +227,8 @@ int libxl__domain_build_info_setdefault(libxl__gc *gc,
     case LIBXL_DOMAIN_TYPE_HVM:
         if (b_info->shadow_memkb == LIBXL_MEMKB_DEFAULT)
             b_info->shadow_memkb = 0;
+        if (b_info->u.hvm.mmio_hole_memkb == LIBXL_MEMKB_DEFAULT)
+            b_info->u.hvm.mmio_hole_memkb = 0;
 
         if (!b_info->u.hvm.vga.kind)
             b_info->u.hvm.vga.kind = LIBXL_VGA_INTERFACE_TYPE_CIRRUS;
@@ -429,13 +432,25 @@ int libxl__domain_build(libxl__gc *gc,
         vments[4] = "start_time";
         vments[5] = libxl__sprintf(gc, "%lu.%02d", start_time.tv_sec,(int)start_time.tv_usec/10000);
 
-        localents = libxl__calloc(gc, 7, sizeof(char *));
-        localents[0] = "platform/acpi";
-        localents[1] = libxl_defbool_val(info->u.hvm.acpi) ? "1" : "0";
-        localents[2] = "platform/acpi_s3";
-        localents[3] = libxl_defbool_val(info->u.hvm.acpi_s3) ? "1" : "0";
-        localents[4] = "platform/acpi_s4";
-        localents[5] = libxl_defbool_val(info->u.hvm.acpi_s4) ? "1" : "0";
+        localents = libxl__calloc(gc, 9, sizeof(char *));
+        i = 0;
+        localents[i++] = "platform/acpi";
+        localents[i++] = libxl_defbool_val(info->u.hvm.acpi) ? "1" : "0";
+        localents[i++] = "platform/acpi_s3";
+        localents[i++] = libxl_defbool_val(info->u.hvm.acpi_s3) ? "1" : "0";
+        localents[i++] = "platform/acpi_s4";
+        localents[i++] = libxl_defbool_val(info->u.hvm.acpi_s4) ? "1" : "0";
+        if (info->u.hvm.mmio_hole_memkb) {
+            uint64_t max_ram_below_4g =
+                (1ULL << 32) - (info->u.hvm.mmio_hole_memkb << 10);
+
+            if (max_ram_below_4g <= HVM_BELOW_4G_MMIO_START) {
+                localents[i++] = "platform/mmio_hole_size";
+                localents[i++] =
+                    libxl__sprintf(gc, "%"PRIu64,
+                                   info->u.hvm.mmio_hole_memkb << 10);
+            }
+        }
 
         break;
     case LIBXL_DOMAIN_TYPE_PV:
index 14dc9fa426c5664dc12c9a0e7ec8e1aacd1eb5c3..3e191c3b263b968e049cca37d54c9d7a0a07dafc 100644 (file)
@@ -18,6 +18,7 @@
 #include "libxl_osdeps.h" /* must come before any other headers */
 
 #include "libxl_internal.h"
+#include <xen/hvm/e820.h>
 
 static const char *libxl_tapif_script(libxl__gc *gc)
 {
@@ -416,6 +417,7 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
     const libxl_vnc_info *vnc = libxl__dm_vnc(guest_config);
     const libxl_sdl_info *sdl = dm_sdl(guest_config);
     const char *keymap = dm_keymap(guest_config);
+    char *machinearg;
     flexarray_t *dm_args;
     int i, connection, devid;
     uint64_t ram_size;
@@ -720,10 +722,24 @@ static char ** libxl__build_device_model_args_new(libxl__gc *gc,
             /* Switching here to the machine "pc" which does not add
              * the xen-platform device instead of the default "xenfv" machine.
              */
-            flexarray_append(dm_args, "pc,accel=xen");
+            machinearg = libxl__sprintf(gc, "pc,accel=xen");
         } else {
-            flexarray_append(dm_args, "xenfv");
+            machinearg = libxl__sprintf(gc, "xenfv");
         }
+        if (b_info->u.hvm.mmio_hole_memkb) {
+            uint64_t max_ram_below_4g = (1ULL << 32) -
+                (b_info->u.hvm.mmio_hole_memkb << 10);
+
+            if (max_ram_below_4g > HVM_BELOW_4G_MMIO_START) {
+                LOG(WARN, "mmio_hole_memkb=%"PRIu64
+                    " invalid ignored.\n",
+                    b_info->u.hvm.mmio_hole_memkb);
+            } else {
+                machinearg = libxl__sprintf(gc, "%s,max-ram-below-4g=%"PRIu64,
+                                            machinearg, max_ram_below_4g);
+            }
+        }
+        flexarray_append(dm_args, machinearg);
         for (i = 0; b_info->extra_hvm && b_info->extra_hvm[i] != NULL; i++)
             flexarray_append(dm_args, b_info->extra_hvm[i]);
         break;
index 9d830e11bde068aaeb36d2998aafe1b7649d884d..74ea84b74a1fd0ddd26df9a022b3463d0605e1e0 100644 (file)
@@ -23,6 +23,7 @@
 #include <xc_dom.h>
 #include <xen/hvm/hvm_info_table.h>
 #include <xen/hvm/hvm_xs_strings.h>
+#include <xen/hvm/e820.h>
 
 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
 {
@@ -800,6 +801,13 @@ int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
     args.mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
     args.mem_target = (uint64_t)(info->target_memkb - info->video_memkb) << 10;
     args.claim_enabled = libxl_defbool_val(info->claim_mode);
+    if (info->u.hvm.mmio_hole_memkb) {
+        uint64_t max_ram_below_4g = (1ULL << 32) -
+            (info->u.hvm.mmio_hole_memkb << 10);
+
+        if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START)
+            args.mmio_size = info->u.hvm.mmio_hole_memkb << 10;
+    }
     if (libxl__domain_firmware(gc, info, &args)) {
         LOG(ERROR, "initializing domain firmware failed");
         goto out;
index fdda7618fd76efce684464860dcdbdac0b7904a6..ca3f72479dbec2e24a2cb22df361a77f40337b4c 100644 (file)
@@ -413,6 +413,7 @@ libxl_domain_build_info = Struct("domain_build_info",[
                                        ("timeoffset",       string),
                                        ("hpet",             libxl_defbool),
                                        ("vpt_align",        libxl_defbool),
+                                       ("mmio_hole_memkb",  MemKB),
                                        ("timer_mode",       libxl_timer_mode),
                                        ("nested_hvm",       libxl_defbool),
                                        ("smbios_firmware",  string),
index 08826443926114689a735b129dba40efbc2ba6e7..3c9f14686f7142156a2e1ab92a2b82de8406008c 100644 (file)
@@ -34,6 +34,7 @@
 #include <ctype.h>
 #include <inttypes.h>
 #include <limits.h>
+#include <xen/hvm/e820.h>
 
 #include "libxl.h"
 #include "libxl_utils.h"
@@ -1191,6 +1192,18 @@ static void parse_config_data(const char *config_source,
             exit(-ERROR_FAIL);
         }
 
+        if (!xlu_cfg_get_long(config, "mmio_hole", &l, 0)) {
+            uint64_t mmio_hole_size;
+
+            b_info->u.hvm.mmio_hole_memkb = l * 1024;
+            mmio_hole_size = b_info->u.hvm.mmio_hole_memkb * 1024;
+            if (mmio_hole_size < HVM_BELOW_4G_MMIO_LENGTH ||
+                mmio_hole_size > HVM_BELOW_4G_MMIO_START) {
+                fprintf(stderr,
+                        "ERROR: invalid value %ld for \"mmio_hole\"\n", l);
+                exit (1);
+            }
+        }
         if (!xlu_cfg_get_long(config, "timer_mode", &l, 1)) {
             const char *s = libxl_timer_mode_to_string(l);
             fprintf(stderr, "WARNING: specifying \"timer_mode\" as an integer is deprecated. "